Practice Lesson 1: The Quanteda Suite - Sandbox-Edition



This script runs without external data, except for quanteda-inherent files. If you have trouble setting your working directory, please use Slack and we will try to help you to be ready for the next session, in which we will have to load external data.

Packages

## load required libraries
library(tidyverse)
library(quanteda)
library(lexicon)
library(reshape2)
library(stringi)
library(quanteda.textplots)
library(gridExtra)
## clean workspace
rm(list=ls())

Clean workspace and set working directory

This step is only for those who use their own corpora.

## set working directory (WD)
path <- '~/coliphi21/practice_lessons/lesson_1/src/'
setwd(path)
## Quickfix: setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

Import data

For this tutorial we will use the quanteda-corpus data_corpus_inaugural containing the inaugural addresses of US presidents since 1789. For the next session, you will be able to work with your own data.

df <- data_corpus_inaugural

Inspect data

## how does the corpus object look like?
df
## Corpus consisting of 59 documents and 4 docvars.
## 1789-Washington :
## "Fellow-Citizens of the Senate and of the House of Representa..."
## 
## 1793-Washington :
## "Fellow citizens, I am again called upon by the voice of my c..."
## 
## 1797-Adams :
## "When it was first perceived, in early times, that no middle ..."
## 
## 1801-Jefferson :
## "Friends and Fellow Citizens: Called upon to undertake the du..."
## 
## 1805-Jefferson :
## "Proceeding, fellow citizens, to that qualification which the..."
## 
## 1809-Madison :
## "Unwilling to depart from examples of the most revered author..."
## 
## [ reached max_ndoc ... 53 more documents ]
## summary statistics
summary(df) %>% head
## what object class is the object?
class(df)
## [1] "corpus"    "character"
## how much space does it use?
object.size(df)
## 838560 bytes
## what does data structure look like?
str(df)
##  'corpus' Named chr [1:59] "Fellow-Citizens of the Senate and of the House of Representatives:\n\nAmong the vicissitudes incident to life n"| __truncated__ "Fellow citizens, I am again called upon by the voice of my country to execute the functions of its Chief Magist"| __truncated__ "When it was first perceived, in early times, that no middle course for America remained between unlimited submi"| __truncated__ "Friends and Fellow Citizens:\n\nCalled upon to undertake the duties of the first executive office of our countr"| __truncated__ "Proceeding, fellow citizens, to that qualification which the Constitution requires before my entrance on the ch"| __truncated__ "Unwilling to depart from examples of the most revered authority, I avail myself of the occasion now presented t"| __truncated__ "About to add the solemnity of an oath to the obligations imposed by a second call to the station in which my co"| __truncated__ ...
##  - attr(*, "names")= chr [1:59] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" ...
##  - attr(*, "docvars")='data.frame':  59 obs. of  7 variables:
##   ..$ docname_ : chr [1:59] "1789-Washington" "1793-Washington" "1797-Adams" "1801-Jefferson" ...
##   ..$ docid_   : Factor w/ 59 levels "1789-Washington",..: 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ segid_   : int [1:59] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ Year     : int [1:59] 1789 1793 1797 1801 1805 1809 1813 1817 1821 1825 ...
##   ..$ President: chr [1:59] "Washington" "Washington" "Adams" "Jefferson" ...
##   ..$ FirstName: chr [1:59] "George" "George" "John" "Thomas" ...
##   ..$ Party    : Factor w/ 6 levels "Democratic","Democratic-Republican",..: 4 4 3 2 2 2 2 2 2 2 ...
##  - attr(*, "meta")=List of 3
##   ..$ system:List of 5
##   .. ..$ package-version:Classes 'package_version', 'numeric_version'  hidden list of 1
##   .. .. ..$ : int [1:3] 3 0 0
##   .. ..$ r-version      :Classes 'R_system_version', 'package_version', 'numeric_version'  hidden list of 1
##   .. .. ..$ : int [1:3] 4 0 2
##   .. ..$ system         : Named chr [1:3] "Darwin" "x86_64" "smueller"
##   .. .. ..- attr(*, "names")= chr [1:3] "sysname" "machine" "user"
##   .. ..$ directory      : chr "/Users/smueller/Documents/GitHub/quanteda"
##   .. ..$ created        : Date[1:1], format: "2021-08-09"
##   ..$ object:List of 2
##   .. ..$ unit   : chr "documents"
##   .. ..$ summary:List of 2
##   .. .. ..$ hash: chr(0) 
##   .. .. ..$ data: NULL
##   ..$ user  :List of 6
##   .. ..$ description: chr "Transcripts of all inaugural addresses delivered by United States Presidents, from Washington 1789 onward.  Dat"| __truncated__
##   .. ..$ source     : chr "Gerhard Peters and John T. Woolley. The American Presidency Project."
##   .. ..$ url        : chr "https://www.presidency.ucsb.edu/documents/presidential-documents-archive-guidebook/inaugural-addresses"
##   .. ..$ author     : chr "(various US Presidents)"
##   .. ..$ keywords   : chr [1:5] "political" "US politics" "United States" "presidents" ...
##   .. ..$ title      : chr "US presidential inaugural address speeches"

Interacting with the data

Document variables

## the document-level variables
docvars(df) %>% head
?table
## Help on topic 'table' was found in the following packages:
## 
##   Package               Library
##   vctrs                 /home/redapemusic35/R/x86_64-pc-linux-gnu-library/4.0
##   base                  /usr/lib/R/library

> Exercise

Task

Compute the number of Democrat and Republican speeches. Hint: checkout the table-function.

Solution
table(df$Party)
## 
##            Democratic Democratic-Republican            Federalist                  none            Republican                  Whig 
##                    22                     7                     1                     2                    24                     3

Selecting documents

## text data: how can we look at Biden's 2021 speech?
txt <- as.character(df)
names(txt)
##  [1] "1789-Washington" "1793-Washington" "1797-Adams"      "1801-Jefferson"  "1805-Jefferson"  "1809-Madison"    "1813-Madison"    "1817-Monroe"     "1821-Monroe"     "1825-Adams"      "1829-Jackson"    "1833-Jackson"    "1837-VanBuren"   "1841-Harrison"   "1845-Polk"       "1849-Taylor"     "1853-Pierce"     "1857-Buchanan"   "1861-Lincoln"    "1865-Lincoln"    "1869-Grant"      "1873-Grant"      "1877-Hayes"      "1881-Garfield"   "1885-Cleveland"  "1889-Harrison"   "1893-Cleveland"  "1897-McKinley"   "1901-McKinley"   "1905-Roosevelt"  "1909-Taft"       "1913-Wilson"     "1917-Wilson"     "1921-Harding"    "1925-Coolidge"   "1929-Hoover"     "1933-Roosevelt"  "1937-Roosevelt"  "1941-Roosevelt"  "1945-Roosevelt"  "1949-Truman"     "1953-Eisenhower" "1957-Eisenhower" "1961-Kennedy"    "1965-Johnson"    "1969-Nixon"      "1973-Nixon"      "1977-Carter"     "1981-Reagan"     "1985-Reagan"     "1989-Bush"       "1993-Clinton"    "1997-Clinton"    "2001-Bush"       "2005-Bush"      
## [56] "2009-Obama"      "2013-Obama"      "2017-Trump"      "2021-Biden"
## subsetting data by logical vectors
c('hi', 'hello', 'bye')[c(TRUE, FALSE, TRUE)]
## [1] "hi"  "bye"
c('hi', 'hello', 'bye')[c(TRUE, FALSE, TRUE, TRUE)]
## [1] "hi"  "bye" NA
c('hi', 'hello', 'bye')[c(TRUE, FALSE)]
## [1] "hi"  "bye"
biden <- txt[grepl('Biden', names(txt))]
cat(biden)
## Chief Justice Roberts, Vice President Harris, Speaker Pelosi, Leader Schumer, Leader McConnell, Vice President Pence, distinguished guests, and my fellow Americans.
## 
## This is America's day.
## 
## This is democracy's day.
## 
## A day of history and hope.
## 
## Of renewal and resolve.
## 
## Through a crucible for the ages America has been tested anew and America has risen to the challenge.
## 
## Today, we celebrate the triumph not of a candidate, but of a cause, the cause of democracy.
## 
## The will of the people has been heard and the will of the people has been heeded.
## 
## We have learned again that democracy is precious.
## 
## Democracy is fragile.
## 
## And at this hour, my friends, democracy has prevailed.
## 
## So now, on this hallowed ground where just days ago violence sought to shake this Capitol's very foundation, we come together as one nation, under God, indivisible, to carry out the peaceful transfer of power as we have for more than two centuries.
## 
## We look ahead in our uniquely American way – restless, bold, optimistic – and set our sights on the nation we know we can be and we must be.
## 
## I thank my predecessors of both parties for their presence here.
## 
## I thank them from the bottom of my heart.
## 
## You know the resilience of our Constitution and the strength of our nation.
## 
## As does President Carter, who I spoke to last night but who cannot be with us today, but whom we salute for his lifetime of service.
## 
## I have just taken the sacred oath each of these patriots took — an oath first sworn by George Washington.
## 
## But the American story depends not on any one of us, not on some of us, but on all of us.
## 
## On "We the People" who seek a more perfect Union.
## 
## This is a great nation and we are a good people.
## 
## Over the centuries through storm and strife, in peace and in war, we have come so far. But we still have far to go.
## 
## We will press forward with speed and urgency, for we have much to do in this winter of peril and possibility.
## 
## Much to repair.
## 
## Much to restore.
## 
## Much to heal.
## 
## Much to build.
## 
## And much to gain.
## 
## Few periods in our nation's history have been more challenging or difficult than the one we're in now.
## 
## A once-in-a-century virus silently stalks the country.
## 
## It's taken as many lives in one year as America lost in all of World War II.
## 
## Millions of jobs have been lost.
## 
## Hundreds of thousands of businesses closed.
## 
## A cry for racial justice some 400 years in the making moves us. The dream of justice for all will be deferred no longer.
## 
## A cry for survival comes from the planet itself. A cry that can't be any more desperate or any more clear.
## 
## And now, a rise in political extremism, white supremacy, domestic terrorism that we must confront and we will defeat.
## 
## To overcome these challenges – to restore the soul and to secure the future of America – requires more than words.
## 
## It requires that most elusive of things in a democracy:
## 
## Unity.
## 
## Unity.
## 
## In another January in Washington, on New Year's Day 1863, Abraham Lincoln signed the Emancipation Proclamation.
## 
## When he put pen to paper, the President said, "If my name ever goes down into history it will be for this act and my whole soul is in it."
## 
## My whole soul is in it.
## 
## Today, on this January day, my whole soul is in this:
## 
## Bringing America together.
## 
## Uniting our people.
## 
## And uniting our nation.
## 
## I ask every American to join me in this cause.
## 
## Uniting to fight the common foes we face:
## 
## Anger, resentment, hatred.
## 
## Extremism, lawlessness, violence.
## 
## Disease, joblessness, hopelessness.
## 
## With unity we can do great things. Important things.
## 
## We can right wrongs.
## 
## We can put people to work in good jobs.
## 
## We can teach our children in safe schools.
## 
## We can overcome this deadly virus.
## 
## We can reward work, rebuild the middle class, and make health care
## 
## secure for all.
## 
## We can deliver racial justice.
## 
## We can make America, once again, the leading force for good in the world.
## 
## I know speaking of unity can sound to some like a foolish fantasy.
## 
## I know the forces that divide us are deep and they are real.
## 
## But I also know they are not new.
## 
## Our history has been a constant struggle between the American ideal that we are all created equal and the harsh, ugly reality that racism, nativism, fear, and demonization have long torn us apart.
## 
## The battle is perennial.
## 
## Victory is never assured.
## 
## Through the Civil War, the Great Depression, World War, 9/11, through struggle, sacrifice, and setbacks, our "better angels" have always prevailed.
## 
## In each of these moments, enough of us came together to carry all of us forward.
## 
## And, we can do so now.
## 
## History, faith, and reason show the way, the way of unity.
## 
## We can see each other not as adversaries but as neighbors.
## 
## We can treat each other with dignity and respect.
## 
## We can join forces, stop the shouting, and lower the temperature.
## 
## For without unity, there is no peace, only bitterness and fury.
## 
## No progress, only exhausting outrage.
## 
## No nation, only a state of chaos.
## 
## This is our historic moment of crisis and challenge, and unity is the path forward.
## 
## And, we must meet this moment as the United States of America.
## 
## If we do that, I guarantee you, we will not fail.
## 
## We have never, ever, ever failed in America when we have acted together.
## 
## And so today, at this time and in this place, let us start afresh.
## 
## All of us.
## 
## Let us listen to one another.
## 
## Hear one another.
## 
## See one another.
## 
## Show respect to one another.
## 
## Politics need not be a raging fire destroying everything in its path.
## 
## Every disagreement doesn't have to be a cause for total war.
## 
## And, we must reject a culture in which facts themselves are manipulated and even manufactured.
## 
## My fellow Americans, we have to be different than this.
## 
## America has to be better than this.
## 
## And, I believe America is better than this.
## 
## Just look around.
## 
## Here we stand, in the shadow of a Capitol dome that was completed amid the Civil War, when the Union itself hung in the balance.
## 
## Yet we endured and we prevailed.
## 
## Here we stand looking out to the great Mall where Dr. King spoke of his dream.
## 
## Here we stand, where 108 years ago at another inaugural, thousands of protestors tried to block brave women from marching for the right to vote.
## 
## Today, we mark the swearing-in of the first woman in American history elected to national office – Vice President Kamala Harris.
## 
## Don't tell me things can't change.
## 
## Here we stand across the Potomac from Arlington National Cemetery, where heroes who gave the last full measure of devotion rest in eternal peace.
## 
## And here we stand, just days after a riotous mob thought they could use violence to silence the will of the people, to stop the work of our democracy, and to drive us from this sacred ground.
## 
## That did not happen.
## 
## It will never happen.
## 
## Not today.
## 
## Not tomorrow.
## 
## Not ever.
## 
## To all those who supported our campaign I am humbled by the faith you have placed in us.
## 
## To all those who did not support us, let me say this: Hear me out as we move forward. Take a measure of me and my heart.
## 
## And if you still disagree, so be it.
## 
## That's democracy. That's America. The right to dissent peaceably, within the guardrails of our Republic, is perhaps our nation's greatest strength.
## 
## Yet hear me clearly: Disagreement must not lead to disunion.
## 
## And I pledge this to you: I will be a President for all Americans.
## 
## I will fight as hard for those who did not support me as for those who did.
## 
## Many centuries ago, Saint Augustine, a saint of my church, wrote that a people was a multitude defined by the common objects of their love.
## 
## What are the common objects we love that define us as Americans?
## 
## I think I know.
## 
## Opportunity.
## 
## Security.
## 
## Liberty.
## 
## Dignity.
## 
## Respect.
## 
## Honor.
## 
## And, yes, the truth.
## 
## Recent weeks and months have taught us a painful lesson.
## 
## There is truth and there are lies.
## 
## Lies told for power and for profit.
## 
## And each of us has a duty and responsibility, as citizens, as Americans, and especially as leaders – leaders who have pledged to honor our Constitution and protect our nation — to defend the truth and to defeat the lies.
## 
## I understand that many Americans view the future with some fear and trepidation.
## 
## I understand they worry about their jobs, about taking care of their families, about what comes next.
## 
## I get it.
## 
## But the answer is not to turn inward, to retreat into competing factions, distrusting those who don't look like you do, or worship the way you do, or don't get their news from the same sources you do.
## 
## We must end this uncivil war that pits red against blue, rural versus urban, conservative versus liberal.
## 
## We can do this if we open our souls instead of hardening our hearts.
## 
## If we show a little tolerance and humility.
## 
## If we're willing to stand in the other person's shoes just for a moment.
## 
## Because here is the thing about life: There is no accounting for what fate will deal you.
## 
## There are some days when we need a hand.
## 
## There are other days when we're called on to lend one.
## 
## That is how we must be with one another.
## 
## And, if we are this way, our country will be stronger, more prosperous, more ready for the future.
## 
## My fellow Americans, in the work ahead of us, we will need each other.
## 
## We will need all our strength to persevere through this dark winter.
## 
## We are entering what may well be the toughest and deadliest period of the virus.
## 
## We must set aside the politics and finally face this pandemic as one nation.
## 
## I promise you this: as the Bible says weeping may endure for a night but joy cometh in the morning.
## 
## We will get through this, together
## 
## The world is watching today.
## 
## So here is my message to those beyond our borders: America has been tested and we have come out stronger for it.
## 
## We will repair our alliances and engage with the world once again.
## 
## Not to meet yesterday's challenges, but today's and tomorrow's.
## 
## We will lead not merely by the example of our power but by the power of our example.
## 
## We will be a strong and trusted partner for peace, progress, and security.
## 
## We have been through so much in this nation.
## 
## And, in my first act as President, I would like to ask you to join me in a moment of silent prayer to remember all those we lost this past year to the pandemic.
## 
## To those 400,000 fellow Americans – mothers and fathers, husbands and wives, sons and daughters, friends, neighbors, and co-workers.
## 
## We will honor them by becoming the people and nation we know we can and should be.
## 
## Let us say a silent prayer for those who lost their lives, for those they left behind, and for our country.
## 
## Amen.
## 
## This is a time of testing.
## 
## We face an attack on democracy and on truth.
## 
## A raging virus.
## 
## Growing inequity.
## 
## The sting of systemic racism.
## 
## A climate in crisis.
## 
## America's role in the world.
## 
## Any one of these would be enough to challenge us in profound ways.
## 
## But the fact is we face them all at once, presenting this nation with the gravest of responsibilities.
## 
## Now we must step up.
## 
## All of us.
## 
## It is a time for boldness, for there is so much to do.
## 
## And, this is certain.
## 
## We will be judged, you and I, for how we resolve the cascading crises of our era.
## 
## Will we rise to the occasion?
## 
## Will we master this rare and difficult hour?
## 
## Will we meet our obligations and pass along a new and better world for our children?
## 
## I believe we must and I believe we will.
## 
## And when we do, we will write the next chapter in the American story.
## 
## It's a story that might sound something like a song that means a lot to me.
## 
## It's called "American Anthem" and there is one verse stands out for me:
## 
## "The work and prayers
## 
## of centuries have brought us to this day
## 
## What shall be our legacy?
## 
## What will our children say?…
## 
## Let me know in my heart
## 
## When my days are through
## 
## America
## 
## America
## 
## I gave my best to you."
## 
## Let us add our own work and prayers to the unfolding story of our nation.
## 
## If we do this then when our days are through our children and our children's children will say of us they gave their best.
## 
## They did their duty.
## 
## They healed a broken land.
## 
## My fellow Americans, I close today where I began, with a sacred oath.
## 
## Before God and all of you I give you my word.
## 
## I will always level with you.
## 
## I will defend the Constitution.
## 
## I will defend our democracy.
## 
## I will defend America.
## 
## I will give my all in your service thinking not of power, but of possibilities.
## 
## Not of personal interest, but of the public good.
## 
## And together, we shall write an American story of hope, not fear.
## 
## Of unity, not division.
## 
## Of light, not darkness.
## 
## An American story of decency and dignity.
## 
## Of love and of healing.
## 
## Of greatness and of goodness.
## 
## May this be the story that guides us.
## 
## The story that inspires us.
## 
## The story that tells ages yet to come that we answered the call of history.
## 
## We met the moment.
## 
## That democracy and hope, truth and justice, did not die on our watch but thrived.
## 
## That our America secured liberty at home and stood once again as a beacon to the world.
## 
## That is what we owe our forebearers, one another, and generations to follow.
## 
## So, with purpose and resolve we turn to the tasks of our time.
## 
## Sustained by faith.
## 
## Driven by conviction.
## 
## And, devoted to one another and to this country we love with all our hearts.
## 
## May God bless America and may God protect our troops.
## 
## Thank you, America.
## subsetting by name
# select Washington's 1789 speech to compare
cat(txt['1789-Washington'])
## Fellow-Citizens of the Senate and of the House of Representatives:
## 
## Among the vicissitudes incident to life no event could have filled me with greater anxieties than that of which the notification was transmitted by your order, and received on the 14th day of the present month. On the one hand, I was summoned by my Country, whose voice I can never hear but with veneration and love, from a retreat which I had chosen with the fondest predilection, and, in my flattering hopes, with an immutable decision, as the asylum of my declining years  -  a retreat which was rendered every day more necessary as well as more dear to me by the addition of habit to inclination, and of frequent interruptions in my health to the gradual waste committed on it by time. On the other hand, the magnitude and difficulty of the trust to which the voice of my country called me, being sufficient to awaken in the wisest and most experienced of her citizens a distrustful scrutiny into his qualifications, could not but overwhelm with despondence one who (inheriting inferior endowments from nature and unpracticed in the duties of civil administration) ought to be peculiarly conscious of his own deficiencies. In this conflict of emotions all I dare aver is that it has been my faithful study to collect my duty from a just appreciation of every circumstance by which it might be affected. All I dare hope is that if, in executing this task, I have been too much swayed by a grateful remembrance of former instances, or by an affectionate sensibility to this transcendent proof of the confidence of my fellow citizens, and have thence too little consulted my incapacity as well as disinclination for the weighty and untried cares before me, my error will be palliated by the motives which mislead me, and its consequences be judged by my country with some share of the partiality in which they originated.
## 
## Such being the impressions under which I have, in obedience to the public summons, repaired to the present station, it would be peculiarly improper to omit in this first official act my fervent supplications to that Almighty Being who rules over the universe, who presides in the councils of nations, and whose providential aids can supply every human defect, that His benediction may consecrate to the liberties and happiness of the people of the United States a Government instituted by themselves for these essential purposes, and may enable every instrument employed in its administration to execute with success the functions allotted to his charge. In tendering this homage to the Great Author of every public and private good, I assure myself that it expresses your sentiments not less than my own, nor those of my fellow citizens at large less than either. No people can be bound to acknowledge and adore the Invisible Hand which conducts the affairs of men more than those of the United States. Every step by which they have advanced to the character of an independent nation seems to have been distinguished by some token of providential agency; and in the important revolution just accomplished in the system of their united government the tranquil deliberations and voluntary consent of so many distinct communities from which the event has resulted can not be compared with the means by which most governments have been established without some return of pious gratitude, along with an humble anticipation of the future blessings which the past seem to presage. These reflections, arising out of the present crisis, have forced themselves too strongly on my mind to be suppressed. You will join with me, I trust, in thinking that there are none under the influence of which the proceedings of a new and free government can more auspiciously commence.
## 
## By the article establishing the executive department it is made the duty of the President "to recommend to your consideration such measures as he shall judge necessary and expedient." The circumstances under which I now meet you will acquit me from entering into that subject further than to refer to the great constitutional charter under which you are assembled, and which, in defining your powers, designates the objects to which your attention is to be given. It will be more consistent with those circumstances, and far more congenial with the feelings which actuate me, to substitute, in place of a recommendation of particular measures, the tribute that is due to the talents, the rectitude, and the patriotism which adorn the characters selected to devise and adopt them. In these honorable qualifications I behold the surest pledges that as on one side no local prejudices or attachments, no separate views nor party animosities, will misdirect the comprehensive and equal eye which ought to watch over this great assemblage of communities and interests, so, on another, that the foundation of our national policy will be laid in the pure and immutable principles of private morality, and the preeminence of free government be exemplified by all the attributes which can win the affections of its citizens and command the respect of the world. I dwell on this prospect with every satisfaction which an ardent love for my country can inspire, since there is no truth more thoroughly established than that there exists in the economy and course of nature an indissoluble union between virtue and happiness; between duty and advantage; between the genuine maxims of an honest and magnanimous policy and the solid rewards of public prosperity and felicity; since we ought to be no less persuaded that the propitious smiles of Heaven can never be expected on a nation that disregards the eternal rules of order and right which Heaven itself has ordained; and since the preservation of the sacred fire of liberty and the destiny of the republican model of government are justly considered, perhaps, as deeply, as finally, staked on the experiment entrusted to the hands of the American people.
## 
## Besides the ordinary objects submitted to your care, it will remain with your judgment to decide how far an exercise of the occasional power delegated by the fifth article of the Constitution is rendered expedient at the present juncture by the nature of objections which have been urged against the system, or by the degree of inquietude which has given birth to them. Instead of undertaking particular recommendations on this subject, in which I could be guided by no lights derived from official opportunities, I shall again give way to my entire confidence in your discernment and pursuit of the public good; for I assure myself that whilst you carefully avoid every alteration which might endanger the benefits of an united and effective government, or which ought to await the future lessons of experience, a reverence for the characteristic rights of freemen and a regard for the public harmony will sufficiently influence your deliberations on the question how far the former can be impregnably fortified or the latter be safely and advantageously promoted.
## 
## To the foregoing observations I have one to add, which will be most properly addressed to the House of Representatives. It concerns myself, and will therefore be as brief as possible. When I was first honored with a call into the service of my country, then on the eve of an arduous struggle for its liberties, the light in which I contemplated my duty required that I should renounce every pecuniary compensation. From this resolution I have in no instance departed; and being still under the impressions which produced it, I must decline as inapplicable to myself any share in the personal emoluments which may be indispensably included in a permanent provision for the executive department, and must accordingly pray that the pecuniary estimates for the station in which I am placed may during my continuance in it be limited to such actual expenditures as the public good may be thought to require.
## 
## Having thus imparted to you my sentiments as they have been awakened by the occasion which brings us together, I shall take my present leave; but not without resorting once more to the benign Parent of the Human Race in humble supplication that, since He has been pleased to favor the American people with opportunities for deliberating in perfect tranquillity, and dispositions for deciding with unparalleled unanimity on a form of government for the security of their union and the advancement of their happiness, so His divine blessing may be equally conspicuous in the enlarged views, the temperate consultations, and the wise measures on which the success of this Government must depend.

> Exercise

Task

Extract both the Trump and Biden speech at the same time. Hint: | means OR in regex-functions, such as grepl.

Solution
l <- txt[grepl('Biden|Trump', names(txt))]
l
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      2017-Trump 
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             "Chief Justice Roberts, President Carter, President Clinton, President Bush, President Obama, fellow Americans, and people of the world: thank you.\n\nWe, the citizens of America, are now joined in a great national effort to rebuild our country and restore its promise for all of our people.\n\nTogether, we will determine the course of America and the world for many, many years to come.\n\nWe will face challenges. We will confront hardships. But we will get the job done.\n\nEvery four years, we gather on these steps to carry out the orderly and peaceful transfer of power, and we are grateful to President Obama and First Lady Michelle Obama for their gracious aid throughout this transition. They have been magnificent. Thank you.\n\nToday's ceremony, however, has very special meaning. Because today we are not merely transferring power from one Administration to another, or from one party to another - but we are transferring power from Washington DC and giving it back to you, the people.\n\nFor too long, a small group in our nation's Capital has reaped the rewards of government while the people have borne the cost.\n\nWashington flourished - but the people did not share in its wealth.\n\nPoliticians prospered - but the jobs left, and the factories closed.\n\nThe establishment protected itself, but not the citizens of our country.\n\nTheir victories have not been your victories; their triumphs have not been your triumphs; and while they celebrated in our nation's capital, there was little to celebrate for struggling families all across our land.\n\nThat all changes - starting right here, and right now, because this moment is your moment: it belongs to you.\n\nIt belongs to everyone gathered here today and everyone watching all across America.\n\nThis is your day. This is your celebration.\n\nAnd this, the United States of America, is your country.\n\nWhat truly matters is not which party controls our government, but whether our government is controlled by the people.\n\nJanuary 20, 2017, will be remembered as the day the people became the rulers of this nation again.\n\nThe forgotten men and women of our country will be forgotten no longer.\n\nEveryone is listening to you now.\n\nYou came by the tens of millions to become part of a historic movement the likes of which the world has never seen before.\n\nAt the center of this movement is a crucial conviction: that a nation exists to serve its citizens.\n\nAmericans want great schools for their children, safe neighborhoods for their families, and good jobs for themselves.\n\nThese are just and reasonable demands of righteous people and a righteous public.\n\nBut for too many of our citizens, a different reality exists: mothers and children trapped in poverty in our inner cities; rusted-out factories scattered like tombstones across the landscape of our nation; an education system, flush with cash, but which leaves our young and beautiful students deprived of all knowledge; and the crime and the gangs and the drugs that have stolen too many lives and robbed our country of so much unrealized potential.\n\nThis American carnage stops right here and stops right now.\n\nWe are one nation - and their pain is our pain. Their dreams are our dreams; and their success will be our success. We share one heart, one home, and one glorious destiny.\n\nThe oath of office I take today is an oath of allegiance to all Americans.\n\nFor many decades, we've enriched foreign industry at the expense of American industry; subsidized the armies of other countries while allowing for the very sad depletion of our military; we've defended other nations' borders while refusing to defend our own; and spent trillions and trillions of dollars overseas while America's infrastructure has fallen into disrepair and decay.\n\nWe've made other countries rich while the wealth, strength, and confidence of our country has dissipated over the horizon.\n\nOne by one, the factories shuttered and left our shores, with not even a thought about the millions and millions of American workers that were left behind.\n\nThe wealth of our middle class has been ripped from their homes and then redistributed all across the world.\n\nBut that is the past. And now we are looking only to the future.\n\nWe assembled here today are issuing a new decree to be heard in every city, in every foreign capital, and in every hall of power.\n\nFrom this day forward, a new vision will govern our land.\n\nFrom this day forward, it's going to be only America first, America first.\n\nEvery decision on trade, on taxes, on immigration, on foreign affairs, will be made to benefit American workers and American families.\n\nWe must protect our borders from the ravages of other countries making our products, stealing our companies, and destroying our jobs. Protection will lead to great prosperity and strength.\n\nI will fight for you with every breath in my body - and I will never, ever let you down.\n\nAmerica will start winning again, winning like never before.\n\nWe will bring back our jobs. We will bring back our borders. We will bring back our wealth. And we will bring back our dreams.\n\nWe will build new roads, and highways, and bridges, and airports, and tunnels, and railways all across our wonderful nation.\n\nWe will get our people off of welfare and back to work - rebuilding our country with American hands and American labor.\n\nWe will follow two simple rules: buy American and hire American.\n\nWe will seek friendship and goodwill with the nations of the world - but we do so with the understanding that it is the right of all nations to put their own interests first.\n\nWe do not seek to impose our way of life on anyone, but rather to let it shine as an example for everyone to follow.\n\nWe will reinforce old alliances and form new ones - and unite the civilized world against radical Islamic terrorism, which we will eradicate from the face of the Earth.\n\nAt the bedrock of our politics will be a total allegiance to the United States of America, and through our loyalty to our country, we will rediscover our loyalty to each other.\n\nWhen you open your heart to patriotism, there is no room for prejudice.\n\nThe Bible tells us: \"How good and pleasant it is when God's people live together in unity.\"\n\nWe must speak our minds openly, debate our disagreements honestly, but always pursue solidarity.\n\nWhen America is united, America is totally unstoppable.\n\nThere should be no fear - we are protected, and we will always be protected.\n\nWe will be protected by the great men and women of our military and law enforcement and, most importantly, we are protected by God.\n\nFinally, we must think big and dream even bigger.\n\nIn America, we understand that a nation is only living as long as it is striving.\n\nWe will no longer accept politicians who are all talk and no action - constantly complaining but never doing anything about it.\n\nThe time for empty talk is over.\n\nNow arrives the hour of action.\n\nDo not let anyone tell you it cannot be done. No challenge can match the heart and fight and spirit of America.\n\nWe will not fail. Our country will thrive and prosper again.\n\nWe stand at the birth of a new millennium, ready to unlock the mysteries of space, to free the Earth from the miseries of disease, and to harness the energies, industries and technologies of tomorrow.\n\nA new national pride will stir ourselves, lift our sights, and heal our divisions.\n\nIt is time to remember that old wisdom our soldiers will never forget: that whether we are black or brown or white, we all bleed the same red blood of patriots, we all enjoy the same glorious freedoms, and we all salute the same great American Flag.\n\nAnd whether a child is born in the urban sprawl of Detroit or the windswept plains of Nebraska, they look up at the same night sky, they fill their heart with the same dreams, and they are infused with the breath of life by the same almighty Creator.\n\nSo to all Americans, in every city near and far, small and large, from mountain to mountain, and from ocean to ocean, hear these words:\n\nYou will never be ignored again.\n\nYour voice, your hopes, and your dreams, will define our American destiny. And your courage and goodness and love will forever guide us along the way.\n\nTogether, we will make America strong again.\n\nWe will make America wealthy again.\n\nWe will make America proud again.\n\nWe will make America safe again.\n\nAnd, yes, together, we will make America great again. Thank you, God bless you, and God bless America." 
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      2021-Biden 
## "Chief Justice Roberts, Vice President Harris, Speaker Pelosi, Leader Schumer, Leader McConnell, Vice President Pence, distinguished guests, and my fellow Americans.\n\nThis is America's day.\n\nThis is democracy's day.\n\nA day of history and hope.\n\nOf renewal and resolve.\n\nThrough a crucible for the ages America has been tested anew and America has risen to the challenge.\n\nToday, we celebrate the triumph not of a candidate, but of a cause, the cause of democracy.\n\nThe will of the people has been heard and the will of the people has been heeded.\n\nWe have learned again that democracy is precious.\n\nDemocracy is fragile.\n\nAnd at this hour, my friends, democracy has prevailed.\n\nSo now, on this hallowed ground where just days ago violence sought to shake this Capitol's very foundation, we come together as one nation, under God, indivisible, to carry out the peaceful transfer of power as we have for more than two centuries.\n\nWe look ahead in our uniquely American way – restless, bold, optimistic – and set our sights on the nation we know we can be and we must be.\n\nI thank my predecessors of both parties for their presence here.\n\nI thank them from the bottom of my heart.\n\nYou know the resilience of our Constitution and the strength of our nation.\n\nAs does President Carter, who I spoke to last night but who cannot be with us today, but whom we salute for his lifetime of service.\n\nI have just taken the sacred oath each of these patriots took — an oath first sworn by George Washington.\n\nBut the American story depends not on any one of us, not on some of us, but on all of us.\n\nOn \"We the People\" who seek a more perfect Union.\n\nThis is a great nation and we are a good people.\n\nOver the centuries through storm and strife, in peace and in war, we have come so far. But we still have far to go.\n\nWe will press forward with speed and urgency, for we have much to do in this winter of peril and possibility.\n\nMuch to repair.\n\nMuch to restore.\n\nMuch to heal.\n\nMuch to build.\n\nAnd much to gain.\n\nFew periods in our nation's history have been more challenging or difficult than the one we're in now.\n\nA once-in-a-century virus silently stalks the country.\n\nIt's taken as many lives in one year as America lost in all of World War II.\n\nMillions of jobs have been lost.\n\nHundreds of thousands of businesses closed.\n\nA cry for racial justice some 400 years in the making moves us. The dream of justice for all will be deferred no longer.\n\nA cry for survival comes from the planet itself. A cry that can't be any more desperate or any more clear.\n\nAnd now, a rise in political extremism, white supremacy, domestic terrorism that we must confront and we will defeat.\n\nTo overcome these challenges – to restore the soul and to secure the future of America – requires more than words.\n\nIt requires that most elusive of things in a democracy:\n\nUnity.\n\nUnity.\n\nIn another January in Washington, on New Year's Day 1863, Abraham Lincoln signed the Emancipation Proclamation.\n\nWhen he put pen to paper, the President said, \"If my name ever goes down into history it will be for this act and my whole soul is in it.\"\n\nMy whole soul is in it.\n\nToday, on this January day, my whole soul is in this:\n\nBringing America together.\n\nUniting our people.\n\nAnd uniting our nation.\n\nI ask every American to join me in this cause.\n\nUniting to fight the common foes we face:\n\nAnger, resentment, hatred.\n\nExtremism, lawlessness, violence.\n\nDisease, joblessness, hopelessness.\n\nWith unity we can do great things. Important things.\n\nWe can right wrongs.\n\nWe can put people to work in good jobs.\n\nWe can teach our children in safe schools.\n\nWe can overcome this deadly virus.\n\nWe can reward work, rebuild the middle class, and make health care\n\nsecure for all.\n\nWe can deliver racial justice.\n\nWe can make America, once again, the leading force for good in the world.\n\nI know speaking of unity can sound to some like a foolish fantasy.\n\nI know the forces that divide us are deep and they are real.\n\nBut I also know they are not new.\n\nOur history has been a constant struggle between the American ideal that we are all created equal and the harsh, ugly reality that racism, nativism, fear, and demonization have long torn us apart.\n\nThe battle is perennial.\n\nVictory is never assured.\n\nThrough the Civil War, the Great Depression, World War, 9/11, through struggle, sacrifice, and setbacks, our \"better angels\" have always prevailed.\n\nIn each of these moments, enough of us came together to carry all of us forward.\n\nAnd, we can do so now.\n\nHistory, faith, and reason show the way, the way of unity.\n\nWe can see each other not as adversaries but as neighbors.\n\nWe can treat each other with dignity and respect.\n\nWe can join forces, stop the shouting, and lower the temperature.\n\nFor without unity, there is no peace, only bitterness and fury.\n\nNo progress, only exhausting outrage.\n\nNo nation, only a state of chaos.\n\nThis is our historic moment of crisis and challenge, and unity is the path forward.\n\nAnd, we must meet this moment as the United States of America.\n\nIf we do that, I guarantee you, we will not fail.\n\nWe have never, ever, ever failed in America when we have acted together.\n\nAnd so today, at this time and in this place, let us start afresh.\n\nAll of us.\n\nLet us listen to one another.\n\nHear one another.\n\nSee one another.\n\nShow respect to one another.\n\nPolitics need not be a raging fire destroying everything in its path.\n\nEvery disagreement doesn't have to be a cause for total war.\n\nAnd, we must reject a culture in which facts themselves are manipulated and even manufactured.\n\nMy fellow Americans, we have to be different than this.\n\nAmerica has to be better than this.\n\nAnd, I believe America is better than this.\n\nJust look around.\n\nHere we stand, in the shadow of a Capitol dome that was completed amid the Civil War, when the Union itself hung in the balance.\n\nYet we endured and we prevailed.\n\nHere we stand looking out to the great Mall where Dr. King spoke of his dream.\n\nHere we stand, where 108 years ago at another inaugural, thousands of protestors tried to block brave women from marching for the right to vote.\n\nToday, we mark the swearing-in of the first woman in American history elected to national office – Vice President Kamala Harris.\n\nDon't tell me things can't change.\n\nHere we stand across the Potomac from Arlington National Cemetery, where heroes who gave the last full measure of devotion rest in eternal peace.\n\nAnd here we stand, just days after a riotous mob thought they could use violence to silence the will of the people, to stop the work of our democracy, and to drive us from this sacred ground.\n\nThat did not happen.\n\nIt will never happen.\n\nNot today.\n\nNot tomorrow.\n\nNot ever.\n\nTo all those who supported our campaign I am humbled by the faith you have placed in us.\n\nTo all those who did not support us, let me say this: Hear me out as we move forward. Take a measure of me and my heart.\n\nAnd if you still disagree, so be it.\n\nThat's democracy. That's America. The right to dissent peaceably, within the guardrails of our Republic, is perhaps our nation's greatest strength.\n\nYet hear me clearly: Disagreement must not lead to disunion.\n\nAnd I pledge this to you: I will be a President for all Americans.\n\nI will fight as hard for those who did not support me as for those who did.\n\nMany centuries ago, Saint Augustine, a saint of my church, wrote that a people was a multitude defined by the common objects of their love.\n\nWhat are the common objects we love that define us as Americans?\n\nI think I know.\n\nOpportunity.\n\nSecurity.\n\nLiberty.\n\nDignity.\n\nRespect.\n\nHonor.\n\nAnd, yes, the truth.\n\nRecent weeks and months have taught us a painful lesson.\n\nThere is truth and there are lies.\n\nLies told for power and for profit.\n\nAnd each of us has a duty and responsibility, as citizens, as Americans, and especially as leaders – leaders who have pledged to honor our Constitution and protect our nation — to defend the truth and to defeat the lies.\n\nI understand that many Americans view the future with some fear and trepidation.\n\nI understand they worry about their jobs, about taking care of their families, about what comes next.\n\nI get it.\n\nBut the answer is not to turn inward, to retreat into competing factions, distrusting those who don't look like you do, or worship the way you do, or don't get their news from the same sources you do.\n\nWe must end this uncivil war that pits red against blue, rural versus urban, conservative versus liberal.\n\nWe can do this if we open our souls instead of hardening our hearts.\n\nIf we show a little tolerance and humility.\n\nIf we're willing to stand in the other person's shoes just for a moment.\n\nBecause here is the thing about life: There is no accounting for what fate will deal you.\n\nThere are some days when we need a hand.\n\nThere are other days when we're called on to lend one.\n\nThat is how we must be with one another.\n\nAnd, if we are this way, our country will be stronger, more prosperous, more ready for the future.\n\nMy fellow Americans, in the work ahead of us, we will need each other.\n\nWe will need all our strength to persevere through this dark winter.\n\nWe are entering what may well be the toughest and deadliest period of the virus.\n\nWe must set aside the politics and finally face this pandemic as one nation.\n\nI promise you this: as the Bible says weeping may endure for a night but joy cometh in the morning.\n\nWe will get through this, together\n\nThe world is watching today.\n\nSo here is my message to those beyond our borders: America has been tested and we have come out stronger for it.\n\nWe will repair our alliances and engage with the world once again.\n\nNot to meet yesterday's challenges, but today's and tomorrow's.\n\nWe will lead not merely by the example of our power but by the power of our example.\n\nWe will be a strong and trusted partner for peace, progress, and security.\n\nWe have been through so much in this nation.\n\nAnd, in my first act as President, I would like to ask you to join me in a moment of silent prayer to remember all those we lost this past year to the pandemic.\n\nTo those 400,000 fellow Americans – mothers and fathers, husbands and wives, sons and daughters, friends, neighbors, and co-workers.\n\nWe will honor them by becoming the people and nation we know we can and should be.\n\nLet us say a silent prayer for those who lost their lives, for those they left behind, and for our country.\n\nAmen.\n\nThis is a time of testing.\n\nWe face an attack on democracy and on truth.\n\nA raging virus.\n\nGrowing inequity.\n\nThe sting of systemic racism.\n\nA climate in crisis.\n\nAmerica's role in the world.\n\nAny one of these would be enough to challenge us in profound ways.\n\nBut the fact is we face them all at once, presenting this nation with the gravest of responsibilities.\n\nNow we must step up.\n\nAll of us.\n\nIt is a time for boldness, for there is so much to do.\n\nAnd, this is certain.\n\nWe will be judged, you and I, for how we resolve the cascading crises of our era.\n\nWill we rise to the occasion?\n\nWill we master this rare and difficult hour?\n\nWill we meet our obligations and pass along a new and better world for our children?\n\nI believe we must and I believe we will.\n\nAnd when we do, we will write the next chapter in the American story.\n\nIt's a story that might sound something like a song that means a lot to me.\n\nIt's called \"American Anthem\" and there is one verse stands out for me:\n\n\"The work and prayers\n\nof centuries have brought us to this day\n\nWhat shall be our legacy?\n\nWhat will our children say?…\n\nLet me know in my heart\n\nWhen my days are through\n\nAmerica\n\nAmerica\n\nI gave my best to you.\"\n\nLet us add our own work and prayers to the unfolding story of our nation.\n\nIf we do this then when our days are through our children and our children's children will say of us they gave their best.\n\nThey did their duty.\n\nThey healed a broken land.\n\nMy fellow Americans, I close today where I began, with a sacred oath.\n\nBefore God and all of you I give you my word.\n\nI will always level with you.\n\nI will defend the Constitution.\n\nI will defend our democracy.\n\nI will defend America.\n\nI will give my all in your service thinking not of power, but of possibilities.\n\nNot of personal interest, but of the public good.\n\nAnd together, we shall write an American story of hope, not fear.\n\nOf unity, not division.\n\nOf light, not darkness.\n\nAn American story of decency and dignity.\n\nOf love and of healing.\n\nOf greatness and of goodness.\n\nMay this be the story that guides us.\n\nThe story that inspires us.\n\nThe story that tells ages yet to come that we answered the call of history.\n\nWe met the moment.\n\nThat democracy and hope, truth and justice, did not die on our watch but thrived.\n\nThat our America secured liberty at home and stood once again as a beacon to the world.\n\nThat is what we owe our forebearers, one another, and generations to follow.\n\nSo, with purpose and resolve we turn to the tasks of our time.\n\nSustained by faith.\n\nDriven by conviction.\n\nAnd, devoted to one another and to this country we love with all our hearts.\n\nMay God bless America and may God protect our troops.\n\nThank you, America."
str(l)
##  Named chr [1:2] "Chief Justice Roberts, President Carter, President Clinton, President Bush, President Obama, fellow Americans, "| __truncated__ "Chief Justice Roberts, Vice President Harris, Speaker Pelosi, Leader Schumer, Leader McConnell, Vice President "| __truncated__
##  - attr(*, "names")= chr [1:2] "2017-Trump" "2021-Biden"

Word tokens and the document-term matrix



## word tokenization
?tokens
?dfm
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F)
toks
## Tokens consisting of 59 documents and 4 docvars.
## 1789-Washington :
##  [1] "Fellow-Citizens" "of"              "the"             "Senate"          "and"             "of"              "the"             "House"           "of"              "Representatives" "Among"           "the"            
## [ ... and 1,418 more ]
## 
## 1793-Washington :
##  [1] "Fellow"   "citizens" "I"        "am"       "again"    "called"   "upon"     "by"       "the"      "voice"    "of"       "my"      
## [ ... and 123 more ]
## 
## 1797-Adams :
##  [1] "When"      "it"        "was"       "first"     "perceived" "in"        "early"     "times"     "that"      "no"        "middle"    "course"   
## [ ... and 2,306 more ]
## 
## 1801-Jefferson :
##  [1] "Friends"   "and"       "Fellow"    "Citizens"  "Called"    "upon"      "to"        "undertake" "the"       "duties"    "of"        "the"      
## [ ... and 1,714 more ]
## 
## 1805-Jefferson :
##  [1] "Proceeding"    "fellow"        "citizens"      "to"            "that"          "qualification" "which"         "the"           "Constitution"  "requires"      "before"        "my"           
## [ ... and 2,154 more ]
## 
## 1809-Madison :
##  [1] "Unwilling" "to"        "depart"    "from"      "examples"  "of"        "the"       "most"      "revered"   "authority" "I"         "avail"    
## [ ... and 1,163 more ]
## 
## [ reached max_ndoc ... 53 more documents ]
## document-term matrix
dfx <- dfm(toks)
dfx
## Document-feature matrix of: 59 documents, 9,422 features (91.89% sparse) and 4 docvars.
##                  features
## docs              fellow-citizens  of the senate and house representatives among vicissitudes incident
##   1789-Washington               1  71 116      1  48     2               2     1            1        1
##   1793-Washington               0  11  13      0   2     0               0     0            0        0
##   1797-Adams                    3 140 163      1 130     0               2     4            0        0
##   1801-Jefferson                2 104 130      0  81     0               0     1            0        0
##   1805-Jefferson                0 101 143      0  93     0               0     7            0        0
##   1809-Madison                  1  69 104      0  43     0               0     0            0        0
## [ reached max_ndoc ... 53 more documents, reached max_nfeat ... 9,412 more features ]

> Exercise

Task

Remove numbers (part of the noise) from the tokens. Hint: Checkout the tokens-function.

Solution
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F, remove_numbers = T,
               remove_url = T, remove_separators = T)
dfx <- dfm(toks)

Top features

## top 10 features for every document
topfeatures(dfx, n = 10, groups = docnames(dfx))
## $`1789-Washington`
##   the    of   and    to which    in     i    be    my    by 
##   116    71    48    48    36    31    23    23    22    20 
## 
## $`1793-Washington`
##   the    of     i    to    in shall   and    by    my    it 
##    13    11     6     5     3     3     2     2     2     2 
## 
## $`1797-Adams`
## the  of and  to   a  in  it  be  by  if 
## 163 140 130  72  51  47  34  31  30  25 
## 
## $`1801-Jefferson`
##   the    of   and    to which  that    in   our     i     a 
##   130   104    81    61    25    24    24    24    21    21 
## 
## $`1805-Jefferson`
##   the    of   and    to  that    in  with their  them  have 
##   143   101    93    83    37    35    28    28    27    24 
## 
## $`1809-Madison`
##   the    of    to   and    in     a    as which    by     i 
##   104    69    61    43    34    19    15    14    11    11 
## 
## $`1813-Madison`
##   the    of   and    to     a    on   our    in    it which 
##   100    65    44    42    25    22    22    21    18    16 
## 
## $`1817-Monroe`
## the  of  to and  in our   a  it  be  is 
## 275 164 126 122  79  65  61  57  50  41 
## 
## $`1821-Monroe`
##   the    of    to   and    in     a which    it    be   our 
##   360   197   146   141   136    76    66    64    64    60 
## 
## $`1825-Adams`
##  the   of  and   to   in   by have that  our been 
##  304  245  116  101   62   38   36   36   36   29 
## 
## $`1829-Jackson`
##   the    of    to   and    in  that   our     a    be their 
##    92    71    53    49    24    21    18    16    16    16 
## 
## $`1833-Jackson`
##   the    of   and    to    in   our    my     a which   all 
##   101    76    53    46    23    19    18    15    14    14 
## 
## $`1837-VanBuren`
##  the   of  and   to   in that  our    a   it    i 
##  252  198  150  139   76   60   60   59   42   39 
## 
## $`1841-Harrison`
##   the    of    to   and    in  that     a    it which    be 
##   829   604   318   231   173   132   132   111   107   106 
## 
## $`1845-Polk`
##  the   of  and   to  our   in   be    a   it that 
##  397  298  189  184  101   87   76   65   54   47 
## 
## $`1849-Taylor`
##   the    of    to   and    in     i    by    be shall   our 
##    99    62    61    52    20    18    17    16    15    15 
## 
## $`1853-Pierce`
##   the    of   and    to     a    in    be  that which    it 
##   230   169   130   107    62    60    57    46    41    34 
## 
## $`1857-Buchanan`
##  the   of   to  and   in    a this  our   it   is 
##  238  139  105   97   61   58   39   35   32   32 
## 
## $`1861-Lincoln`
##  the   of   to  and   in   be that   it    a   is 
##  256  146  134  105   77   76   59   59   56   49 
## 
## $`1865-Lincoln`
##   the    to   and    of    it  that   war   all which    in 
##    58    27    24    22    13    12    12    10     9     9 
## 
## $`1869-Grant`
##  the   to   of  and   in   be    i    a   it will 
##   83   57   47   27   27   25   19   19   16   16 
## 
## $`1873-Grant`
##  the   of  and   to   in    i   my    a that   be 
##  106   72   50   49   26   25   21   21   20   19 
## 
## $`1877-Hayes`
##  the   of  and   to   in    a that   be   by   as 
##  240  166  102   88   63   41   39   32   26   26 
## 
## $`1881-Garfield`
##  the   of  and   to   in   is that    a   it  our 
##  317  181  119   80   49   37   35   35   35   35 
## 
## $`1885-Cleveland`
##   the    of   and    to    in     a   our their    is    be 
##   174   117   103    57    31    30    26    22    19    18 
## 
## $`1889-Harrison`
##  the   of  and   to   in  our that    a  not   be 
##  360  240  192  133   80   76   66   65   46   45 
## 
## $`1893-Cleveland`
##   the    of   and    to   our    in    be which  that    by 
##   156   119   102    79    46    36    25    23    21    21 
## 
## $`1897-McKinley`
## the  of and  to  in  be our   a  it  is 
## 345 228 171 113  81  65  60  57  56  46 
## 
## $`1901-McKinley`
## the  of and  to  in  we  be  it our for 
## 200 110  97  65  42  28  27  26  25  23 
## 
## $`1905-Roosevelt`
##   the    of   and    we    to    in   our     a which  have 
##    65    45    38    32    28    23    22    20    16    15 
## 
## $`1909-Taft`
## the  of and  to  in   a  be  is  as  it 
## 486 314 220 218 140 109  79  62  58  56 
## 
## $`1913-Wilson`
##  the   of  and   to   we  our   in   it    a have 
##  109   87   78   49   40   30   29   29   27   25 
## 
## $`1917-Wilson`
##  the  and   of   we   to   in  our that have   be 
##   94   77   76   47   46   36   33   29   27   22 
## 
## $`1921-Harding`
## the  of and  to  we our  in for   a  is 
## 200 159 152 104  80  68  63  52  47  47 
## 
## $`1925-Coolidge`
##  the   of  and   to   we    a   in that   is  not 
##  261  207  146  135   88   77   71   65   65   61 
## 
## $`1929-Hoover`
##  the   of  and   to   in  our    a   is  for that 
##  288  250  122  100   83   75   49   48   44   39 
## 
## $`1933-Roosevelt`
##  the   of  and   to   in    a that  our   we   it 
##  130  109   58   50   44   38   32   29   26   25 
## 
## $`1937-Roosevelt`
##   of  the   to  and   we    a that  our   in have 
##  106  106   56   53   47   39   33   33   29   21 
## 
## $`1941-Roosevelt`
##  the   of  and   to   in   we    a   it   is that 
##  114   81   47   36   35   32   31   28   24   23 
## 
## $`1945-Roosevelt`
##  the   we   of  and   to that  our    a   in   it 
##   27   26   25   21   16   14   14   13   11    7 
## 
## $`1949-Truman`
##  the  and   of   to   we   in that    a  our  for 
##  141  100   96   81   59   56   37   36   32   30 
## 
## $`1953-Eisenhower`
##  the   of  and   to   we   in  our that this    a 
##  171  142  101   81   66   65   58   40   37   33 
## 
## $`1957-Eisenhower`
## the  of and  we  to  in our all   a  is 
## 114  96  64  51  44  43  38  26  25  20 
## 
## $`1961-Kennedy`
##  the   of   to  and   we    a   in  our that  not 
##   86   65   43   41   30   29   26   21   20   19 
## 
## $`1965-Johnson`
##  the  and   of   to   in   we    a  our that   is 
##   77   65   57   37   36   34   33   32   27   27 
## 
## $`1969-Nixon`
##  the   of   to   we   in  our that  and   as    a 
##  136   94   69   65   61   47   42   39   34   31 
## 
## $`1973-Nixon`
##  the   of   to   in  and   we    a that  for  our 
##   83   68   65   58   50   47   35   33   32   32 
## 
## $`1977-Carter`
##  the  and   to   we  our   of    a  for that   in 
##   53   48   44   43   35   33   29   24   23   22 
## 
## $`1981-Reagan`
##  the  and   of   to   we  our    a   in that will 
##  122   92   90   80   57   56   46   45   34   33 
## 
## $`1985-Reagan`
## the and  of  to  we   a our  in for  is 
## 130 110  95  73  68  59  55  46  35  33 
## 
## $`1989-Bush`
## the and   a  to  of  we  is our  in are 
## 121  98  73  63  61  60  49  44  38  36 
## 
## $`1993-Clinton`
##   the   and   our    we    to    of    in    is   for world 
##    89    66    57    52    49    46    31    28    20    18 
## 
## $`1997-Clinton`
##  the   of  and   to  our    a   we   in  new that 
##  133   96   94   64   63   59   42   35   29   27 
## 
## $`2001-Bush`
## and  of the our  we   a  to  in  is not 
##  82  58  53  50  47  46  45  31  31  27 
## 
## $`2005-Bush`
##  the   of  and   in  our   to   we   is that    a 
##  142  116  108   51   50   38   37   30   28   27 
## 
## $`2009-Obama`
##  the  and   of   to  our   we that    a   is   in 
##  135  111   82   70   67   62   49   47   36   25 
## 
## $`2013-Obama`
##  the  and  our   of   we   to that    a  for   is 
##  104   89   76   69   68   66   55   37   28   25 
## 
## $`2017-Trump`
##     and     the      of     our      we    will      to      is america       a 
##      77      71      48      47      46      40      36      21      18      15 
## 
## $`2021-Biden`
##  the  and   we   of   to    a  our   in this    i 
##  101   96   88   77   65   46   43   42   39   33
## ugh, not very informative...
## let's remove stopwords before creating a document-term matrix
## this is done during tokenization
stopwords('en')
##   [1] "i"          "me"         "my"         "myself"     "we"         "our"        "ours"       "ourselves"  "you"        "your"       "yours"      "yourself"   "yourselves" "he"         "him"        "his"        "himself"    "she"        "her"        "hers"       "herself"    "it"         "its"        "itself"     "they"       "them"       "their"      "theirs"     "themselves" "what"       "which"      "who"        "whom"       "this"       "that"       "these"      "those"      "am"         "is"         "are"        "was"        "were"       "be"         "been"       "being"      "have"       "has"        "had"        "having"     "do"         "does"       "did"        "doing"      "would"      "should"     "could"      "ought"      "i'm"        "you're"     "he's"       "she's"      "it's"       "we're"      "they're"    "i've"       "you've"     "we've"      "they've"    "i'd"        "you'd"      "he'd"       "she'd"      "we'd"       "they'd"     "i'll"       "you'll"    
##  [77] "he'll"      "she'll"     "we'll"      "they'll"    "isn't"      "aren't"     "wasn't"     "weren't"    "hasn't"     "haven't"    "hadn't"     "doesn't"    "don't"      "didn't"     "won't"      "wouldn't"   "shan't"     "shouldn't"  "can't"      "cannot"     "couldn't"   "mustn't"    "let's"      "that's"     "who's"      "what's"     "here's"     "there's"    "when's"     "where's"    "why's"      "how's"      "a"          "an"         "the"        "and"        "but"        "if"         "or"         "because"    "as"         "until"      "while"      "of"         "at"         "by"         "for"        "with"       "about"      "against"    "between"    "into"       "through"    "during"     "before"     "after"      "above"      "below"      "to"         "from"       "up"         "down"       "in"         "out"        "on"         "off"        "over"       "under"      "again"      "further"    "then"       "once"       "here"       "there"      "when"       "where"     
## [153] "why"        "how"        "all"        "any"        "both"       "each"       "few"        "more"       "most"       "other"      "some"       "such"       "no"         "nor"        "not"        "only"       "own"        "same"       "so"         "than"       "too"        "very"       "will"
sel_toks <- tokens_select(toks, pattern = stopwords("en"), selection = "remove")
dfx <- dfm(sel_toks)

> Exercise

Task

Remove additional words that are not part of the stopwords-vector, such as “much”, “every”, and “never”. Hint: Two or more vectors can be concatenated by putting them into one and the same vector.

Solution
?tokens_select
sel_toks <- tokens_select(toks, pattern = c(stopwords("en"), 'every', 'never'), selection = "remove", valuetype = "glob")

Now let’s have a look at it again:

# again: 10 features for every document, now without stopwords
topfeatures(dfx, n = 10, groups = docnames(dfx))
## $`1789-Washington`
##        can      every government     public        may    present    country        one   citizens       duty 
##          9          9          8          6          6          5          5          4          4          4 
## 
## $`1793-Washington`
##          shall            now           oath        present        country          voice         called       citizens administration     confidence 
##              3              2              2              1              1              1              1              1              1              1 
## 
## $`1797-Adams`
##       people   government          may      nations      country          can       states       nation constitution      foreign 
##           20           16           13           11            9            9            9            9            8            8 
## 
## $`1801-Jefferson`
## government         us        may        let        one      shall  principle        man   citizens     fellow 
##         12         10          8          7          6          6          6          6          5          5 
## 
## $`1805-Jefferson`
##   public citizens      may   fellow    state    among    shall       us      can     time 
##       14       10       10        8        8        7        7        7        6        6 
## 
## $`1809-Madison`
##     public    nations        can    country       well     states     rights      peace confidence     united 
##          6          6          5          4          4          4          4          4          3          3 
## 
## $`1813-Madison`
##      war  country    every   united  british   states   nation  without   spirit citizens 
##       15        5        5        5        5        4        4        4        4        3 
## 
## $`1817-Monroe`
##     states government      great     people      every         us     united       just        may      union 
##         21         21         21         15         14         14         13         10         10         10 
## 
## $`1821-Monroe`
##      great     states     united        war        may       made   citizens      every government     people 
##         29         20         16         16         15         15         14         13         12         11 
## 
## $`1825-Adams`
##      union government       upon     rights    country     public      great      peace      first     nation 
##         20         17         16         10          9          9          9          9          8          8 
## 
## $`1829-Jackson`
##     public government      shall        can      power        may     people   national      whose     duties 
##          8          6          6          5          5          4          4          4          3          3 
## 
## $`1833-Jackson`
##   government       people        union       states       powers         upon      general          may       united preservation 
##           13            9            9            8            5            5            5            4            4            4 
## 
## $`1837-VanBuren`
##        every       people institutions   government      country         upon           us          may          can        never 
##           20           20           16           15           13           13           12           11            9            8 
## 
## $`1841-Harrison`
##        power       people   government constitution          may         upon          one          can    executive       states 
##           47           38           36           36           34           34           26           26           25           24 
## 
## $`1845-Polk`
##   government       states        union          one       people       powers constitution      country    interests         upon 
##           45           36           32           19           16           16           15           14           14           14 
## 
## $`1849-Taylor`
##        shall   government      country       duties          may    interests constitution           us     congress          day 
##           15            7            6            5            4            4            4            4            4            3 
## 
## $`1853-Pierce`
##       upon        can      power government      every        may      shall       must         us     states 
##         24         14         11         10          9          9          9          9          9          8 
## 
## $`1857-Buchanan`
##       states        shall constitution          may       people   government        great     question      country       public 
##           22           18           17           15           13           13           11           11            9            9 
## 
## $`1861-Lincoln`
##          can constitution       people        union       states   government        shall          now         upon          one 
##           28           24           20           20           19           18           17           15           15           14 
## 
## $`1865-Lincoln`
##   war shall   god years union   let   may right  must    us 
##    12     5     5     4     4     4     3     3     3     3 
## 
## $`1869-Grant`
## country     now   every  public     may without      us    laws    best  dollar 
##       8       8       6       5       5       5       5       5       5       5 
## 
## $`1873-Grant`
##    country     people       made       best      great     office        one        can government       good 
##          8          7          6          6          5          5          4          4          4          4 
## 
## $`1877-Hayes`
##    country government       upon     public     states  political     people      great      party   citizens 
##         20         15         15         11         11         10          9          9          8          7 
## 
## $`1881-Garfield`
##       people   government       states constitution          can         upon        great        union          law       nation 
##           21           20           15           15           13           13           11           11           10            9 
## 
## $`1885-Cleveland`
##       people   government       public        shall constitution    interests        every     citizens       policy         upon 
##           16           16           11           10            8            7            5            5            5            5 
## 
## $`1889-Harrison`
##       people         upon       states        shall       public         laws          may        great constitution   government 
##           29           21           20           18           17           17           12           12           11           10 
## 
## $`1893-Cleveland`
##     people government         us        can      every     public   american    support   national    service 
##         19         13         10          9          9          8          8          7          6          6 
## 
## $`1897-McKinley`
##       upon     people government       must   congress      great    country        can     public      every 
##         31         25         23         23         18         16         14         13         13         12 
## 
## $`1901-McKinley`
## government     people       upon        now     united     states  executive   congress         us      shall 
##         13         12         11         10          9          9          9          9          8          7 
## 
## $`1905-Roosevelt`
##       us     life   people     must    great   nation problems      men    power    cause 
##       12        6        6        6        5        5        5        4        4        4 
## 
## $`1909-Taft`
## government   business       must        can        may       upon     proper   congress       race        law 
##         26         22         19         18         18         16         15         14         13         13 
## 
## $`1913-Wilson`
##      great government       life      every        men       upon    justice     things     nation      shall 
##         14          9          8          8          8          8          8          7          6          6 
## 
## $`1917-Wilson`
##    upon   shall      us purpose  action    life   world   peace   stand     can 
##      13       9       8       8       7       6       6       6       6       5 
## 
## $`1921-Harding`
##        world         must      america          war        never civilization          can          new        order          may 
##           23           23           15           13           12           12           11           11           10           10 
## 
## $`1925-Coolidge`
##        can    country       must      great     people government      world      peace       much       upon 
##         26         17         17         16         15         14         13         13         12         12 
## 
## $`1929-Hoover`
## government        can       upon   progress     people      world       must      peace    justice     nation 
##         24         17         17         16         15         15         15         15         14         12 
## 
## $`1933-Roosevelt`
##        can   national       must        may     people      shall leadership     helped     nation      world 
##         11          9          9          8          7          7          7          7          6          6 
## 
## $`1937-Roosevelt`
## government     people        can     nation       good        men        see  democracy      power   progress 
##         15         11          9          9          8          8          8          8          7          7 
## 
## $`1941-Roosevelt`
##    nation      know    spirit democracy      life        us    people   america     years   freedom 
##        11        10         9         9         8         8         7         7         6         6 
## 
## $`1945-Roosevelt`
##   shall   peace learned     men   today     can     way    test    life  fellow 
##       7       6       5       4       4       3       3       3       2       2 
## 
## $`1949-Truman`
##  nations    world      can    peace   people  freedom     free   united     must security 
##       22       22       16       14       12       12       11       10        9        9 
## 
## $`1953-Eisenhower`
##    free   world   faith   peace   shall      us  people    must    upon freedom 
##      21      14      13      12      11      11      10      10      10      10 
## 
## $`1957-Eisenhower`
##     may nations   world   peace freedom  people    seek     can    must    upon 
##      15      14      14      11      11      10      10       9       9       6 
## 
## $`1961-Kennedy`
##      let       us      can    world    sides      new   pledge citizens  nations     free 
##       16       12        9        8        8        7        7        5        5        5 
## 
## $`1965-Johnson`
##     us change nation   must people  union    man  world    old  every 
##     12     12     11     10      9      9      9      7      7      6 
## 
## $`1969-Nixon`
##     us    can people  world  peace    let   know    now   make  earth 
##     20     17     14     13     12     11     10      9      9      9 
## 
## $`1973-Nixon`
##             us            let          peace          world            new            can        america responsibility     government          great 
##             26             22             19             16             15             14             13             11             10              9 
## 
## $`1977-Carter`
##      can   nation      new     must       us   people together strength   spirit    human 
##       13       10        9        8        8        7        7        7        6        5 
## 
## $`1981-Reagan`
##         us government       must    believe     people  americans        one       time      world    freedom 
##         25         16         10         10          9          9          8          8          8          8 
## 
## $`1985-Reagan`
##         us     people      world        one government    freedom       must       time        now      human 
##         27         16         15         14         13         13         12         10         10          9 
## 
## $`1989-Bush`
##    new     us    can  great nation  world   free   must   hand   good 
##     14     13     11     10     10     10      9      9      8      8 
## 
## $`1993-Clinton`
##     world      must   america        us    people     today       new       let    change americans 
##        18        18        15        13        12        10         9         9         9         9 
## 
## $`1997-Clinton`
##     new      us century  nation    time   every  people america    land     one 
##      29      27      20      13      12      11      11      11      11      10 
## 
## $`2001-Bush`
##       us  country citizens    story   nation  america      can    every     must    never 
##       11        9        9        9        8        8        6        6        6        5 
## 
## $`2005-Bush`
##   freedom   liberty   america     every       one    nation   country     world americans america's 
##        25        15        12        10         9         9         8         8         8         8 
## 
## $`2009-Obama`
##      us     can  nation     new   every    must america  people    less     let 
##      23      13      12      11       8       8       8       7       7       7 
## 
## $`2013-Obama`
##       us     must   people     time      can    every together     make      one  country 
##       21       17       11       10        7        7        7        7        6        6 
## 
## $`2017-Trump`
##  america american   people  country      one    every    never    great   nation      new 
##       18       11       10        9        8        7        6        6        6        6 
## 
## $`2021-Biden`
##        us   america       can       one    nation      must democracy    people   another  american 
##        27        18        16        15        12        10        10         9         9         9
# we can also compute topfeatures by any docvar
docvars(dfx)
topfeatures(dfx, n = 10, groups = Party)
## $Democratic
##         us     people        can government       must     nation      world        new      shall      every 
##        222        199        173        143        138        126        118        113        111        109 
## 
## $`Democratic-Republican`
## government      great     states        war        may     public      every         us      union    country 
##         68         61         56         51         49         48         45         44         42         40 
## 
## $Federalist
##       people   government          may      nations      country          can       states       nation constitution      foreign 
##           20           16           13           11            9            9            9            9            8            8 
## 
## $none
##        can      every government        may    present    country     public      shall   citizens     people 
##          9          9          9          7          6          6          6          6          5          5 
## 
## $Republican
##     people government        can         us       must       upon      world      great    country      peace 
##        264        240        228        218        201        192        180        159        147        139 
## 
## $Whig
##   government       states       people        power constitution          may         upon        union          one      country 
##           88           61           57           57           55           51           50           47           45           42

> Exercise

Task

Print topfeatures per President.

Solution
topfeatures(dfx, n = 10, groups = President)
## $Adams
##   government       people        union         upon      country      nations       nation          may constitution       public 
##           33           27           22           21           18           18           17           16           16           15 
## 
## $Biden
##        us   america       can       one    nation      must democracy    people   another  american 
##        27        18        16        15        12        10        10         9         9         9 
## 
## $Buchanan
##       states        shall constitution          may       people   government        great     question      country       public 
##           22           18           17           15           13           13           11           11            9            9 
## 
## $Bush
## freedom  nation      us america     can   world    must country     new    time 
##      36      27      27      27      24      21      21      20      20      18 
## 
## $Carter
##      can   nation      new     must       us   people together strength   spirit    human 
##       13       10        9        8        8        7        7        7        6        5 
## 
## $Cleveland
##     people government     public      every      shall         us  interests   american        can       upon 
##         35         29         19         14         14         14         12         12         11         11 
## 
## $Clinton
##      us     new   world    must america  people century    time  nation     let 
##      40      38      28      28      26      23      21      19      18      18 
## 
## $Coolidge
##        can    country       must      great     people government      world      peace       much       upon 
##         26         17         17         16         15         14         13         13         12         12 
## 
## $Eisenhower
##   world    free   peace nations freedom  people     may    must    upon     can 
##      28      26      23      21      21      20      19      19      16      15 
## 
## $Garfield
##       people   government       states constitution          can         upon        great        union          law       nation 
##           21           20           15           15           13           13           11           11           10            9 
## 
## $Grant
## country     now    best  people  office     one without    upon     can   every 
##      16      11      11       9       9       8       8       8       7       7 
## 
## $Harding
##        world         must      america          war        never civilization          can          new        order          may 
##           23           23           15           13           12           12           11           11           10           10 
## 
## $Harrison
##       people         upon        power constitution          may   government       states        great          can    executive 
##           67           55           53           47           46           46           44           33           31           30 
## 
## $Hayes
##    country government       upon     public     states  political     people      great      party   citizens 
##         20         15         15         11         11         10          9          9          8          7 
## 
## $Hoover
## government        can       upon   progress     people      world       must      peace    justice     nation 
##         24         17         17         16         15         15         15         15         14         12 
## 
## $Jackson
## government     people     public     states      union      shall        can        may      power     powers 
##         19         13         11         10         10          9          8          8          7          6 
## 
## $Jefferson
##     public        may         us   citizens government     fellow      shall      state        can      peace 
##         18         18         17         15         15         13         13         10          9          9 
## 
## $Johnson
##     us change nation   must people  union    man  world    old  every 
##     12     12     11     10      9      9      9      7      7      6 
## 
## $Kennedy
##      let       us      can    world    sides      new   pledge citizens  nations     free 
##       16       12        9        8        8        7        7        5        5        5 
## 
## $Lincoln
##          can        union constitution        shall       people       states   government          now          one          may 
##           28           24           24           22           20           19           19           17           16           16 
## 
## $Madison
##     war country  public  united  states   every nations     can  nation without 
##      16       9       8       8       8       7       7       6       6       6 
## 
## $McKinley
##       upon     people government       must   congress     united     states      great        now     public 
##         42         37         36         27         27         21         21         21         20         19 
## 
## $Monroe
##      great     states government     united      every     people        war        may       made         us 
##         50         41         33         29         27         26         26         25         20         20 
## 
## $Nixon
##         us        let        can      peace      world        new     people    america       make government 
##         46         33         31         31         29         23         20         18         16         15 
## 
## $Obama
##      us    must     can  people  nation     new    time   every america     now 
##      44      25      20      18      18      17      16      15      14      11 
## 
## $Pierce
##       upon        can      power government      every        may      shall       must         us     states 
##         24         14         11         10          9          9          9          9          9          8 
## 
## $Polk
##   government       states        union          one       people       powers constitution      country    interests         upon 
##           45           36           32           19           16           16           15           14           14           14 
## 
## $Reagan
##         us government     people      world        one       must    freedom       time  americans        now 
##         52         29         25         23         22         22         21         18         16         15 
## 
## $Roosevelt
##     people         us     nation        can government       must        men      shall       life  democracy 
##         32         32         31         25         23         23         22         22         21         20 
## 
## $Taft
## government   business       must        can        may       upon     proper   congress       race        law 
##         26         22         19         18         18         16         15         14         13         13 
## 
## $Taylor
##        shall   government      country       duties          may    interests constitution           us     congress          day 
##           15            7            6            5            4            4            4            4            4            3 
## 
## $Truman
##  nations    world      can    peace   people  freedom     free   united     must security 
##       22       22       16       14       12       12       11       10        9        9 
## 
## $Trump
##  america american   people  country      one    every    never    great   nation      new 
##       18       11       10        9        8        7        6        6        6        6 
## 
## $`Van Buren`
##        every       people institutions   government      country         upon           us          may          can        never 
##           20           20           16           15           13           13           12           11            9            8 
## 
## $Washington
##        can      every government        may    present    country     public      shall   citizens     people 
##          9          9          9          7          6          6          6          6          5          5 
## 
## $Wilson
##    upon   great   shall    life      us     men  nation  things justice purpose 
##      21      18      15      14      13      11      11      11      10      10

Level of analysis: sentence

Sometimes we want to analyze certain indicators on sentence-level. To show how to go about doing so, we will compute the per-sentence sentiment in Biden’s 2021 speech.

Reshape, Subset and Prepare Documents

## first step: extract Biden's speech from the corpus
biden <- corpus_subset(df, President == 'Biden')
## 2nd step: reshape corpus from full texts to sentences
sentences <- corpus_reshape(biden, to = 'sentences')
sentences
## Corpus consisting of 216 documents and 4 docvars.
## 2021-Biden.1 :
## "Chief Justice Roberts, Vice President Harris, Speaker Pelosi..."
## 
## 2021-Biden.2 :
## "This is America's day."
## 
## 2021-Biden.3 :
## "This is democracy's day."
## 
## 2021-Biden.4 :
## "A day of history and hope."
## 
## 2021-Biden.5 :
## "Of renewal and resolve."
## 
## 2021-Biden.6 :
## "Through a crucible for the ages America has been tested anew..."
## 
## [ reached max_ndoc ... 210 more documents ]
## 3rd step: within-sentence word tokenization
# tokenize
sentence_toks <- tokens(sentences, what = 'word',  remove_punct = T)
# make lower case
sentence_toks <- tokens_tolower(sentence_toks)
# remove stopwords
sentence_toks <- tokens_select(sentence_toks, pattern = stopwords("en"), 
                               selection = "remove")

Sentiment annotation

## select a sentiment dictionary
## we use the Proksch et al. (2015 dictionary native to quanteda)
data_dictionary_LSD2015
## Dictionary object with 4 key entries.
## - [negative]:
##   - a lie, abandon*, abas*, abattoir*, abdicat*, aberra*, abhor*, abject*, abnormal*, abolish*, abominab*, abominat*, abrasiv*, absent*, abstrus*, absurd*, abus*, accident*, accost*, accursed* [ ... and 2,838 more ]
## - [positive]:
##   - ability*, abound*, absolv*, absorbent*, absorption*, abundanc*, abundant*, acced*, accentuat*, accept*, accessib*, acclaim*, acclamation*, accolad*, accommodat*, accomplish*, accord, accordan*, accorded*, accords [ ... and 1,689 more ]
## - [neg_positive]:
##   - best not, better not, no damag*, no no, not ability*, not able, not abound*, not absolv*, not absorbent*, not absorption*, not abundanc*, not abundant*, not acced*, not accentuat*, not accept*, not accessib*, not acclaim*, not acclamation*, not accolad*, not accommodat* [ ... and 1,701 more ]
## - [neg_negative]:
##   - not a lie, not abandon*, not abas*, not abattoir*, not abdicat*, not aberra*, not abhor*, not abject*, not abnormal*, not abolish*, not abominab*, not abominat*, not abrasiv*, not absent*, not abstrus*, not absurd*, not abus*, not accident*, not accost*, not accursed* [ ... and 2,840 more ]
## apply dictionary to the Biden's speech
toks_lsd <- tokens_lookup(sentence_toks, dictionary = data_dictionary_LSD2015[1:2])
dfm_lsd <- dfm(toks_lsd)
## compute percentage of positive words per sentence
## over the course of the speech
# melt dfm to long table
df_lsd <- convert(dfm_lsd, to = "data.frame")
df_lsd
df_lsd <- melt(df_lsd, id.vars = 'doc_id', variable.name = 'sentiment', value.name = 'n')
head(df_lsd)
# group by sentence (doc_id) and compute percentages
df_lsd <- df_lsd %>% 
  group_by(doc_id) %>% 
  mutate(perc = n/sum(n))
head(df_lsd)
# give every sentence a numeric value corresponding to doc ID
df_lsd <- df_lsd %>% 
  ungroup %>% 
  mutate(num_id = as.numeric(stri_extract(doc_id, regex = '(?<=\\.)[0-9]+')))

Visualization

## plot results
ggplot(data = df_lsd, aes(x = num_id, y = perc, colour = sentiment, 
                          group = sentiment)) +
  geom_smooth() +
  #geom_point(alpha = 0.5) +
  theme_classic() +
  theme(plot.title = element_text(face = 'bold')) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0.01, 0.01), labels = scales::percent) +
  labs(
    title = "Biden's 2021 speech: sentiment per sentence, smoothed",
    x = 'Sentence Number Within Speech',
    y = 'Frequency'
  )

## inspect sentence-tokens vectors 55-65
sentence_toks[55:65]
## Tokens consisting of 11 documents and 4 docvars.
## 2021-Biden.55 :
## [1] "can"    "right"  "wrongs"
## 
## 2021-Biden.56 :
## [1] "can"    "put"    "people" "work"   "good"   "jobs"  
## 
## 2021-Biden.57 :
## [1] "can"      "teach"    "children" "safe"     "schools" 
## 
## 2021-Biden.58 :
## [1] "can"      "overcome" "deadly"   "virus"   
## 
## 2021-Biden.59 :
##  [1] "can"     "reward"  "work"    "rebuild" "middle"  "class"   "make"    "health"  "care"    "secure" 
## 
## 2021-Biden.60 :
## [1] "can"     "deliver" "racial"  "justice"
## 
## [ reached max_ndoc ... 5 more documents ]
## uh.. this doesn't look like it's meant negatively.
## let's double check by reading the untokenized sentences
sentences[55:65]
## Corpus consisting of 11 documents and 4 docvars.
## 2021-Biden.55 :
## "We can right wrongs."
## 
## 2021-Biden.56 :
## "We can put people to work in good jobs."
## 
## 2021-Biden.57 :
## "We can teach our children in safe schools."
## 
## 2021-Biden.58 :
## "We can overcome this deadly virus."
## 
## 2021-Biden.59 :
## "We can reward work, rebuild the middle class, and make healt..."
## 
## 2021-Biden.60 :
## "We can deliver racial justice."
## 
## [ reached max_ndoc ... 5 more documents ]
## as expected, Biden is mentioning ISSUES, but in a combative way

Level of analysis: token windows

Let’s compare the word embeddings for ‘progress’, ‘spirit’, ‘world’, ‘nation’, ‘duty’, and ‘war’ between Democrats and Republicans. We define the embedding as a window of +/-10 words around these keywords.

Prep

## tokenization
toks <- tokens(df, remove_punct = T, remove_symbols = T, padding = F)
toks <- tokens_replace(toks,
                       pattern = '\\bUS\\b',
                       replacement = 'USA')
toks <- tokens_tolower(toks)
## lemmatizing
toks <- tokens_replace(toks, 
                       pattern = lexicon::hash_lemmas$token, 
                       replacement = lexicon::hash_lemmas$lemma)
## remove stopwords stopwords
# custom stopwords
cstmwrds <- c('upon', 'can', 'us', 'let', 'may', 'us', 'make',
              'must', 'many', 'shall', 'without', 'among',
              'much', 'every', 'ever', 'know', 'new', 'never',
              'year', 'find', 'see', 'good')
# remove them all
toks <- tokens_select(toks,  pattern = c(stopwords("en"), cstmwrds), 
                      selection = "remove")
## define the keywords
query <- c('progress', 'spirit', 'world', 'nation', 'duty', 'war')

Feature co-occurence matrix for ‘duty’

## subset data by the party-affiliation
demo <- tokens_subset(toks, Party == 'Democratic')
repub <- tokens_subset(toks, Party == 'Republican')
## select tokens within +/- 10 words around the keyword 'duty'
toks_demo <- tokens_select(demo, pattern = 'duty', selection = "keep", 
                           window = 10, padding = FALSE, verbose = TRUE)
toks_repub <- tokens_select(repub, pattern = 'duty', selection = "keep", 
                            window = 10, padding = FALSE, verbose = TRUE)
## create feature co-occurence matrix (fcm) with weights within the window
## the more distance between the words, the less weight the co-occurence gets
dfcmat_demo <- fcm(toks_demo, context = 'window', window = 10, 
                   count = 'weighted', tri = FALSE)
dfcmat_repub <- fcm(toks_repub, context = 'window', window = 10, 
                    count = 'weighted', tri = FALSE)
dfcmat_demo # have a look at one of the fcms
## Feature co-occurrence matrix of: 641 by 641 features.
##          features
## features  life     event day month       one      hand country voice health time
##   life       0 0           0     0 0         0               0     0      0    0
##   event      0 0           0     0 0.1428571 0               0     0      0    0
##   day        0 0           0     0 0         0               0     0      0    0
##   month      0 0           0     0 0         0               0     0      0    0
##   one        0 0.1428571   0     0 0         0.1666667       0     0      0    0
##   hand       0 0           0     0 0.1666667 0               0     0      0    0
##   country    0 0           0     0 0         0               0     0      0    0
##   voice      0 0           0     0 0         0               0     0      0    0
##   health     0 0           0     0 0         0               0     0      1    0
##   time       0 0           0     0 0         0               0     0      0    0
## [ reached max_feat ... 631 more features, reached max_nfeat ... 631 more features ]
## subset the fcm, by selecting those top 50 terms
dfcmat_demo <- fcm_select(dfcmat_demo, 
                          pattern =  names(topfeatures(dfcmat_demo, 51)), 
                          selection = "keep")
dfcmat_repub <- fcm_select(dfcmat_repub, 
                           pattern =  names(topfeatures(dfcmat_repub, 51)), 
                           selection = "keep")

Visualization

## compute varying word-label sizes for each term based on its frequency
label_sizes_demo <- rowSums(dfcmat_demo)/min(rowSums(dfcmat_demo))*1.2
## override the size for the term in <i> (cannibalizes the whole space of the plot)
label_sizes_demo['duty'] <- 0.1
set.seed(123) # set seed for repoducibility
p_demo <- quanteda.textplots::textplot_network(dfcmat_demo, 
                                              min_freq = 0.5,
                                              edge_alpha = 0.2,
                                              vertex_size = 
                                                rowSums(dfcmat_demo)/min(rowSums(dfcmat_demo))/8,
                                              vertex_labelsize = label_sizes_demo,
                                              edge_color = 'dodgerblue') + 
      # additional theme tweaks
      labs(title = 'Democratic: duty') +
      theme(plot.title = element_text(face = 'bold'))
## same for republicans
label_sizes_repub <- rowSums(dfcmat_repub)/min(rowSums(dfcmat_repub))*1.2
label_sizes_repub['duty'] <- 0.1
p_repub <- quanteda.textplots::textplot_network(dfcmat_repub, 
                                              min_freq = 0.5,
                                              edge_alpha = 0.2,
                                              vertex_size = 
                                                rowSums(dfcmat_repub)/min(rowSums(dfcmat_repub))/8,
                                              vertex_labelsize = label_sizes_repub,
                                              edge_color = 'firebrick') + 
      # additional theme tweaks
      labs(title = 'Republican: duty') +
      theme(plot.title = element_text(face = 'bold'))
## plot
grid.arrange(p_demo, p_repub, ncol = 2)

Serialize with a loop

## feature co-occurence matrix
container <- list() # plot-container
for(m in c('Democratic', 'Republican')){ # loop over party
  for(i in query){ # loop over keywords
    ## subset data by the party-affiliation in <m>
    toks_sel <- tokens_subset(toks, Party == m)
    ## select tokens within +/- 10 words around the keyword in <i>
    toks_sel <- tokens_select(toks_sel, pattern = i, selection = "keep", window = 10, 
                              padding = FALSE, verbose = TRUE)
    ## create feature co-occurence matrix (fcm) with weights within the window
    ## the more distance between the words, the less weight the co-occurence gets
    dfcmat <- fcm(toks_sel, context = 'window', window = 10, 
                  count = 'weighted', tri = FALSE)
    ## extract the names of the 50 top co-occurences in the embedding for the term in <i>
    feat <- names(topfeatures(dfcmat, 51))
    ## subset the fcm, by selecting those top 50 terms
    dfcmat_sel <- fcm_select(dfcmat, pattern = feat, selection = "keep")
    ## create plot
    # compute varying word-label sizes for each term based on its frequency
    label_sizes <- rowSums(dfcmat_sel)/min(rowSums(dfcmat_sel))*0.8
    # override the size for the term in <i> (cannibalizes the whole space of the plot)
    label_sizes[i] <- 0.1
    set.seed(123) # set seed for repoducibility
    p <- quanteda.textplots::textplot_network(dfcmat_sel, 
                                              min_freq = 0.5,
                                              edge_alpha = 0.2,
                                              vertex_size = 
                                                rowSums(dfcmat_sel)/min(rowSums(dfcmat_sel))/8,
                                              vertex_labelsize = label_sizes,
                                              edge_color = 
                                                ifelse(m=='Republican', 
                                                       'firebrick', 
                                                       'dodgerblue')) + 
      # additional theme tweaks
      labs(title = paste0(m, ': ', i)) +
      theme(plot.title = element_text(face = 'bold'))
    ## populate the container
    container[[paste0(m, ': ', i)]] <- p 
  }
}
## plot panel
names(container)
##  [1] "Democratic: progress" "Democratic: spirit"   "Democratic: world"    "Democratic: nation"   "Democratic: duty"     "Democratic: war"      "Republican: progress" "Republican: spirit"   "Republican: world"    "Republican: nation"   "Republican: duty"     "Republican: war"
grid.arrange(container[[1]], container[[7]], 
             container[[2]], container[[8]], 
             container[[3]], container[[9]],
             container[[4]], container[[10]], 
             container[[5]], container[[11]], 
             container[[6]], container[[12]],
             ncol = 2)

 




A work by Lucien Baumgartner

https://lucienbaumgartner.github.io/" class="fa fa-home">